countMatrix <- ReadDataFrameFromTsv(file.name.path="../../data/refSEQ_countMatrix.txt")
## ../../data/refSEQ_countMatrix.txt read from disk!
# head(countMatrix)
designMatrix <- ReadDataFrameFromTsv(file.name.path="../../design/all_samples_short_names_noRS2HC7.tsv")
## ../../design/all_samples_short_names_noRS2HC7.tsv read from disk!
# head(designMatrix)
# rownames <- as.character(rownames(countMatrix))
# rownames <- rownames[order(rownames)]
# rownames.map <- convertGenesViaBiomart(specie="mm10", filter="entrezgene",
# filter.values=rownames, attrs=c("external_gene_name",
# "mgi_symbol", "entrezgene"))
#
# noNaCountMatrix <- attachGeneColumnToDf(mainDf=countMatrix,
# genesMap=rownames.map,
# rowNamesIdentifier="entrezgene",
# mapFromIdentifier="entrezgene",
# mapToIdentifier="external_gene_name")
filteredCountsProp <- filterLowCounts(counts.dataframe=countMatrix,
is.normalized=FALSE,
design.dataframe=designMatrix,
cond.col.name="gcondition",
method.type="Proportion")
## features dimensions before normalization: 27179
## Filtering out low count features...
## 14454 features are to be kept for differential expression analysis with filtering method 3
PlotPCAPlotlyFunction(counts.data.frame=log1p(filteredCountsProp), design.matrix=designMatrix, shapeColname="condition", colorColname="genotype", xPCA="PC1", yPCA="PC2", plotly.flag=TRUE, show.plot.flag=TRUE, prefix.plot="Prop-Un-Norm")
## [1] TRUE
# pc1_2 <- PlotPCAPlotlyFunction(counts.data.frame=log1p(filteredCountsProp), design.matrix=designMatrix, shapeColname="genotype", colorColname="condition", xPCA="PC1", yPCA="PC2", plotly.flag=TRUE, show.plot.flag=FALSE, prefix.plot="Prop-Un-Norm")
# pc2_3 <- PlotPCAPlotlyFunction(counts.data.frame=log1p(filteredCountsProp), design.matrix=designMatrix, shapeColname="genotype", colorColname="condition", xPCA="PC2", yPCA="PC3", plotly.flag=TRUE, show.plot.flag=FALSE, prefix.plot="Prop-Un-Norm")
# pc1_3 <- PlotPCAPlotlyFunction(counts.data.frame=log1p(filteredCountsProp), design.matrix=designMatrix, shapeColname="genotype", colorColname="condition", xPCA="PC1", yPCA="PC3", plotly.flag=TRUE, show.plot.flag=FALSE, prefix.plot="Prop-Un-Norm")
# plotly::subplot(pc1_2, pc2_3, pc1_3, nrows=2, margin = 0.1, titleX=TRUE, titleY=TRUE)
Loading Negative Control Genes to normalize data
library(readxl)
sd.ctrls <- read_excel(path="../../data/controls/Additional File 4 full list of BMC genomics SD&RS2.xlsx", sheet=1)
sd.ctrls <- sd.ctrls[order(sd.ctrls$adj.P.Val),]
sd.neg.ctrls <- sd.ctrls[sd.ctrls$adj.P.Val > 0.9, ]
sd.neg.ctrls <- sd.neg.ctrls$`MGI Symbol`
sd.neg.ctrls <- sd.neg.ctrls[-which(is.na(sd.neg.ctrls))]
int.neg.ctrls <- sd.neg.ctrls
int.neg.ctrls <- unique(int.neg.ctrls)
# neg.map <- convertGenesViaMouseDb(gene.list=int.neg.ctrls, fromType="SYMBOL",
# "ENTREZID")
# sum(is.na(neg.map$ENTREZID))
# neg.ctrls.entrez <- as.character(neg.map$ENTREZID)
neg.map <- convertGenesViaBiomart(specie="mm10", filter="mgi_symbol",
filter.values=int.neg.ctrls, c("external_gene_name",
"mgi_symbol", "entrezgene"))
neg.map.nna <- neg.map[-which(is.na(neg.map$entrezgene)),]
neg.ctrls.entrez <- as.character(neg.map.nna$entrezgene)
ind.ctrls <- which(rownames(filteredCountsProp) %in% neg.ctrls.entrez)
counts.neg.ctrls <- filteredCountsProp[ind.ctrls,]
# pc1_2 <- PlotPCAPlotlyFunction(counts.data.frame=log1p(counts.neg.ctrls), design.matrix=designMatrix, shapeColname="genotype", colorColname="condition", xPCA="PC1", yPCA="PC2", plotly.flag=TRUE, show.plot.flag=FALSE, prefix.plot="Neg Ctrls not Norm")
# pc2_3 <- PlotPCAPlotlyFunction(counts.data.frame=log1p(counts.neg.ctrls), design.matrix=designMatrix, shapeColname="genotype", colorColname="condition", xPCA="PC2", yPCA="PC3", plotly.flag=TRUE, show.plot.flag=FALSE, prefix.plot="Neg Ctrls not Norm")
# pc1_3 <- PlotPCAPlotlyFunction(counts.data.frame=log1p(counts.neg.ctrls), design.matrix=designMatrix, shapeColname="genotype", colorColname="condition", xPCA="PC1", yPCA="PC3", plotly.flag=TRUE, show.plot.flag=FALSE, prefix.plot="Neg Ctrls not Norm")
# plotly::subplot(pc1_2, pc2_3, pc1_3, nrows=2, margin = 0.1, titleX=TRUE, titleY=TRUE)
Positive Control Genes
## sleep deprivation
sd.lit.pos.ctrls <- read_excel("../../data/controls/SD_RS_PosControls_final.xlsx",
sheet=1)
colnames(sd.lit.pos.ctrls) <- sd.lit.pos.ctrls[1,]
sd.lit.pos.ctrls <- sd.lit.pos.ctrls[-1,]
sd.est.pos.ctrls <- read_excel("../../data/controls/SD_RS_PosControls_final.xlsx",
sheet=3)
sd.pos.ctrls <- cbind(sd.est.pos.ctrls$`MGI Symbol`, "est")
sd.pos.ctrls <- rbind(sd.pos.ctrls, cbind(sd.lit.pos.ctrls$Gene, "lit"))
sd.pos.ctrls <- sd.pos.ctrls[-which(duplicated(sd.pos.ctrls[,1])),]
sd.pos.ctrls <- sd.pos.ctrls[-which(is.na(sd.pos.ctrls[,1])),]
normPropCountsUqua <- NormalizeData(data.to.normalize=filteredCountsProp,
norm.type="tmm",
design.matrix=designMatrix)
PlotPCAPlotlyFunction(counts.data.frame=log1p(normPropCountsUqua), design.matrix=designMatrix, shapeColname="condition", colorColname="genotype", xPCA="PC1", yPCA="PC2", plotly.flag=TRUE, show.plot.flag=TRUE, prefix.plot="TMM-Norm")
## [1] TRUE
# pc1_2 <- PlotPCAPlotlyFunction(counts.data.frame=log1p(normPropCountsUqua), design.matrix=designMatrix, shapeColname="genotype", colorColname="condition", xPCA="PC1", yPCA="PC2", plotly.flag=TRUE, show.plot.flag=FALSE, prefix.plot="TMM-Norm")
# pc2_3 <- PlotPCAPlotlyFunction(counts.data.frame=log1p(normPropCountsUqua), design.matrix=designMatrix, shapeColname="genotype", colorColname="condition", xPCA="PC2", yPCA="PC3", plotly.flag=TRUE, show.plot.flag=FALSE, prefix.plot="TMM-Norm")
# pc1_3 <- PlotPCAPlotlyFunction(counts.data.frame=log1p(normPropCountsUqua), design.matrix=designMatrix, shapeColname="genotype", colorColname="condition", xPCA="PC1", yPCA="PC3", plotly.flag=TRUE, show.plot.flag=FALSE, prefix.plot="TMM-Norm")
# plotly::subplot(pc1_2, pc2_3, pc1_3, nrows=2, margin = 0.1, titleX=TRUE, titleY=TRUE)
pal <- RColorBrewer::brewer.pal(9, "Set1")
plotRLE(as.matrix(normPropCountsUqua), outline=FALSE, col=pal[designMatrix$gcondition])
K=5
library(RUVSeq)
neg.ctrl.list <- rownames(counts.neg.ctrls)
groups <- makeGroups(designMatrix$gcondition)
ruvedSExprData <- RUVs(as.matrix(round(normPropCountsUqua)), cIdx=neg.ctrl.list,
scIdx=groups, k=5)
normExprData <- ruvedSExprData$normalizedCounts
PlotPCAPlotlyFunction(counts.data.frame=log1p(normExprData), design.matrix=designMatrix, shapeColname="condition", colorColname="genotype", xPCA="PC1", yPCA="PC2", plotly.flag=TRUE, show.plot.flag=TRUE, prefix.plot="TMM+RUV-Norm")
## [1] TRUE
# pc1_2 <- PlotPCAPlotlyFunction(counts.data.frame=log1p(normExprData), design.matrix=designMatrix, shapeColname="genotype", colorColname="condition", xPCA="PC1", yPCA="PC2", plotly.flag=TRUE, show.plot.flag=FALSE, prefix.plot="TMM+RUV-Norm")
# pc2_3 <- PlotPCAPlotlyFunction(counts.data.frame=log1p(normExprData), design.matrix=designMatrix, shapeColname="genotype", colorColname="condition", xPCA="PC2", yPCA="PC3", plotly.flag=TRUE, show.plot.flag=FALSE, prefix.plot="TMM+RUV-Norm")
# pc1_3 <- PlotPCAPlotlyFunction(counts.data.frame=log1p(normExprData), design.matrix=designMatrix, shapeColname="genotype", colorColname="condition", xPCA="PC1", yPCA="PC3", plotly.flag=TRUE, show.plot.flag=FALSE, prefix.plot="TMM+RUV-Norm")
# plotly::subplot(pc1_2, pc2_3, pc1_3, nrows=2, margin = 0.1, titleX=TRUE, titleY=TRUE)
pal <- RColorBrewer::brewer.pal(9, "Set1")
plotRLE(normExprData, outline=FALSE, col=pal[designMatrix$gcondition])
### edgering
padj.thr <- 0.05
desMat <- cbind(designMatrix, ruvedSExprData$W)
colnames(desMat) <- c(colnames(designMatrix), colnames(ruvedSExprData$W))
cc <- c("WTSD5 - WTHC5", "KOHC5 - WTHC5",
"KOSD5 - WTSD5", "KOSD5 - KOHC5")
rescList1 <- applyEdgeR(counts=filteredCountsProp, design.matrix=desMat,
factors.column="gcondition",
weight.columns=c("W_1", "W_2", "W_3", "W_4", "W_5"),
contrasts=cc, useIntercept=FALSE, p.threshold=1,
is.normalized=FALSE, verbose=TRUE)
names <- names(rescList1)
rescList1 <- lapply(seq_along(rescList1), function(i)
{
attachMeans(normalized.counts=normExprData, design.matrix=desMat,
factor.column="gcondition", contrast.name=names(rescList1)[i],
de.results=rescList1[[i]])
})
names(rescList1) <- names
PlotHistPvalPlot(de.results=rescList1[[1]], design.matrix=desMat,
show.plot.flag=TRUE, plotly.flag=TRUE,
prefix.plot=names(rescList1)[1])
## mapping ensembl gene id using biomart
# sd.pos.ctrls <- sd.pos.ctrls$`MGI Symbol`
# sd.pos.ctrls <- sd.pos.ctrls[-which(is.na(sd.pos.ctrls))]
# length(sd.pos.ctrls)
res.o.map <- convertGenesViaBiomart(specie="mm10", filter="entrezgene",
filter.values=rownames(rescList1[[1]]),
c("external_gene_name", "mgi_symbol", "entrezgene"))
res.o <- attachGeneColumnToDf(mainDf=rescList1[[1]],
genesMap=res.o.map,
rowNamesIdentifier="entrezgene",
mapFromIdentifier="entrezgene",
mapToIdentifier="external_gene_name")
# res.o.map1 <- convertGenesViaMouseDb(gene.list=rownames(rescList1[[1]]),
# fromType="ENTREZID")
#
# res.o <- attachGeneColumnToDf(mainDf=rescList1[[1]],
# genesMap=res.o.map1,
# rowNamesIdentifier="ENTREZID",
# mapFromIdentifier="ENTREZID",
# mapToIdentifier="SYMBOL")
WriteDataFrameAsTsv(data.frame.to.save=res.o,
file.name.path=paste0(names(rescList1)[1], "_edgeR"))
vp <- luciaVolcanoPlot(res.o, sd.pos.ctrls, prefix=names(rescList1)[1],
threshold=padj.thr)
ggplotly(vp)
# vp <- PlotVolcanoPlot(de.results=res.o, counts.dataframe=normExprData,
# design.matrix=desMat,
# show.plot.flag=FALSE, plotly.flag=FALSE, save.plot=FALSE,
# prefix.plot=names(rescList1)[1], threshold=padj.thr)#,
# #positive.ctrls.list=sd.pos.ctrls)
de <- sum(res.o$FDR < padj.thr)
nde <- sum(res.o$FDR >= padj.thr)
detable <- cbind(de,nde)
rownames(detable) <- names(rescList1)[1]
ddetable <- detable
tot.ctrls <- dim(sd.pos.ctrls)[1]
idx.pc <- which(tolower(res.o$gene) %in% tolower(sd.pos.ctrls[,1]))
tot.pc.de <- sum(res.o$FDR[idx.pc] < padj.thr)
tot.pc.nde <- length(idx.pc) - tot.pc.de
pos.df <- cbind(tot.ctrls, tot.pc.de, tot.pc.nde)
colnames(pos.df) <- c("total_p.ctrl", "p.ctrl_de_mapped",
"p.ctrl_notde_mapped")
rownames(pos.df) <- names(rescList1)[1]
# PlotMAPlotCounts(de.results=res.o, counts.dataframe=normExprData, design.matrix=desMat,
# show.plot.flag=TRUE, plotly.flag=TRUE, save.plot=FALSE, prefix.plot=names(rescList1)[1], threshold=0.05)
PlotHistPvalPlot(de.results=rescList1[[2]], design.matrix=desMat,
show.plot.flag=TRUE, plotly.flag=TRUE,
prefix.plot=names(rescList1)[2])
rs2.o.map <- convertGenesViaBiomart(specie="mm10", filter="entrezgene",
filter.values=rownames(rescList1[[2]]),
c("external_gene_name", "mgi_symbol", "entrezgene"))
res.rs2.o <- attachGeneColumnToDf(mainDf=rescList1[[2]],
genesMap=rs2.o.map,
rowNamesIdentifier="entrezgene",
mapFromIdentifier="entrezgene",
mapToIdentifier="external_gene_name")
# rs2.o.map <- convertGenesViaMouseDb(gene.list=rownames(rescList1[[2]]),
# fromType="ENTREZID")
#
# res.rs2.o <- attachGeneColumnToDf(mainDf=rescList1[[2]],
# genesMap=rs2.o.map,
# rowNamesIdentifier="ENTREZID",
# mapFromIdentifier="ENTREZID",
# mapToIdentifier="SYMBOL")
WriteDataFrameAsTsv(data.frame.to.save=res.rs2.o,
file.name.path=paste0(names(rescList1)[2], "_edgeR"))
vp <- luciaVolcanoPlot(res.rs2.o, positive.controls.df=NULL,
prefix=names(rescList1)[2],
threshold=padj.thr)
ggplotly(vp)
# PlotVolcanoPlot(de.results=res.rs2.o, counts.dataframe=normExprData,
# design.matrix=desMat,
# show.plot.flag=TRUE, plotly.flag=TRUE, save.plot=FALSE,
# prefix.plot=names(rescList1)[2], threshold=padj.thr,
# positive.ctrls.list=rs2.pos.ctrls)
de <- sum(res.rs2.o$FDR < padj.thr)
nde <- sum(res.rs2.o$FDR >= padj.thr)
detable <- cbind(de,nde)
rownames(detable) <- names(rescList1)[2]
ddetable <- rbind(ddetable, detable)
#
# tot.ctrls <- dim(rs.pos.ctrls)[1]
# idx.pc <- which(tolower(res.o$gene) %in% tolower(rs.pos.ctrls[,1]))
# tot.pc.de <- sum(res.o$FDR[idx.pc] < padj.thr)
# tot.pc.nde <- length(idx.pc) - tot.pc.de
# pos.dff <- cbind(tot.ctrls, tot.pc.de, tot.pc.nde)
# rownames(pos.dff) <- names(rescList1)[2]
# pos.df <- rbind(pos.df, pos.dff)
# # colnames(pos.df) <- c("total p.ctrl", "p.ctrl de mapped",
# # "p.ctrl not de mapped")
# PlotMAPlotCounts(de.results=res.o, counts.dataframe=normExprData, design.matrix=desMat,
# show.plot.flag=TRUE, plotly.flag=TRUE, save.plot=FALSE, prefix.plot=names(rescList1)[2], threshold=0.05)
PlotHistPvalPlot(de.results=rescList1[[3]], design.matrix=desMat,
show.plot.flag=TRUE, plotly.flag=TRUE,
prefix.plot=names(rescList1)[3])
## mapping ensembl gene id using biomart
res.o.map <- convertGenesViaBiomart(specie="mm10", filter="entrezgene",
filter.values=rownames(rescList1[[3]]),
c("external_gene_name", "mgi_symbol", "entrezgene"))
res.o <- attachGeneColumnToDf(mainDf=rescList1[[3]],
genesMap=res.o.map,
rowNamesIdentifier="entrezgene",
mapFromIdentifier="entrezgene",
mapToIdentifier="external_gene_name")
# res.o.map <- convertGenesViaMouseDb(gene.list=rownames(rescList1[[2]]),
# fromType="ENTREZID")
#
# res.o <- attachGeneColumnToDf(mainDf=rescList1[[3]],
# genesMap=res.o.map,
# rowNamesIdentifier="ENTREZID",
# mapFromIdentifier="ENTREZID",
# mapToIdentifier="SYMBOL")
WriteDataFrameAsTsv(data.frame.to.save=res.o,
file.name.path=paste0(names(rescList1)[3], "_edgeR"))
vp <- luciaVolcanoPlot(res.o, positive.controls.df=sd.pos.ctrls,
prefix=names(rescList1)[3], threshold=padj.thr)
ggplotly(vp)
de <- sum(res.o$FDR < padj.thr)
nde <- sum(res.o$FDR >= padj.thr)
detable <- cbind(de,nde)
rownames(detable) <- names(rescList1)[3]
ddetable <- rbind(ddetable, detable)
tot.ctrls <- dim(sd.pos.ctrls)[1]
idx.pc <- which(tolower(res.o$gene) %in% tolower(sd.pos.ctrls[,1]))
tot.pc.de <- sum(res.o$FDR[idx.pc] < padj.thr)
tot.pc.nde <- length(idx.pc) - tot.pc.de
pos.dff <- cbind(tot.ctrls, tot.pc.de, tot.pc.nde)
rownames(pos.dff) <- names(rescList1)[2]
pos.df <- rbind(pos.df, pos.dff)
# PlotMAPlotCounts(de.results=res.o, counts.dataframe=normExprData, design.matrix=desMat,
# show.plot.flag=TRUE, plotly.flag=TRUE, save.plot=FALSE, prefix.plot=names(rescList1)[1], threshold=0.05)
PlotHistPvalPlot(de.results=rescList1[[4]], design.matrix=desMat,
show.plot.flag=TRUE, plotly.flag=TRUE,
prefix.plot=names(rescList1)[4])
## mapping ensembl gene id using biomart
res.o.map <- convertGenesViaBiomart(specie="mm10", filter="entrezgene",
filter.values=rownames(rescList1[[4]]),
c("external_gene_name", "mgi_symbol", "entrezgene"))
res.o <- attachGeneColumnToDf(mainDf=rescList1[[4]],
genesMap=res.o.map,
rowNamesIdentifier="entrezgene",
mapFromIdentifier="entrezgene",
mapToIdentifier="external_gene_name")
# res.o.map <- convertGenesViaMouseDb(gene.list=rownames(rescList1[[4]]),
# fromType="ENTREZID")
#
# res.o <- attachGeneColumnToDf(mainDf=rescList1[[4]],
# genesMap=res.o.map,
# rowNamesIdentifier="ENTREZID",
# mapFromIdentifier="ENTREZID",
# mapToIdentifier="SYMBOL")
WriteDataFrameAsTsv(data.frame.to.save=res.o,
file.name.path=paste0(names(rescList1)[4], "_edgeR"))
vp <- luciaVolcanoPlot(res.o, positive.controls.df=sd.pos.ctrls,
prefix=names(rescList1)[4], threshold=padj.thr)
ggplotly(vp)
#
# PlotVolcanoPlot(de.results=res.o, counts.dataframe=normExprData,
# design.matrix=desMat,
# show.plot.flag=TRUE, plotly.flag=TRUE, save.plot=FALSE,
# prefix.plot=names(rescList1)[4], threshold=padj.thr,
# positive.ctrls.list=NULL)
de <- sum(res.o$FDR < padj.thr)
nde <- sum(res.o$FDR >= padj.thr)
detable <- cbind(de,nde)
rownames(detable) <- names(rescList1)[4]
ddetable <- rbind(ddetable, detable)
tot.ctrls <- dim(sd.pos.ctrls)[1]
idx.pc <- which(tolower(res.o$gene) %in% tolower(sd.pos.ctrls[,1]))
tot.pc.de <- sum(res.o$FDR[idx.pc] < padj.thr)
tot.pc.nde <- length(idx.pc) - tot.pc.de
pos.dff <- cbind(tot.ctrls, tot.pc.de, tot.pc.nde)
rownames(pos.dff) <- names(rescList1)[2]
pos.df <- rbind(pos.df, pos.dff)
# PlotMAPlotCounts(de.results=res.o, counts.dataframe=normExprData, design.matrix=desMat,
# show.plot.flag=TRUE, plotly.flag=TRUE, save.plot=FALSE, prefix.plot=names(rescList1)[1], threshold=0.05)
ddetable
## de nde
## WTSD5 - WTHC5 5603 8851
## KOHC5 - WTHC5 39 14415
## KOSD5 - WTSD5 80 14374
## KOSD5 - KOHC5 5714 8740
pos.df
## total_p.ctrl p.ctrl_de_mapped p.ctrl_notde_mapped
## WTSD5 - WTHC5 579 439 98
## KOHC5 - WTHC5 579 4 533
## KOHC5 - WTHC5 579 438 99